Exploring performance#

In this demo, we …

## Installing (if not) and importing compiam to the project
import importlib.util
if importlib.util.find_spec('compiam') is None:
    %pip install compiam
if importlib.util.find_spec('essentia') is None:
    %pip install essentia
if importlib.util.find_spec('torch') is None:
    %pip install "torch==1.13"
if importlib.util.find_spec('tensorflow') is None:
    %pip install "tensorflow==2.15.0" "keras<3"

import compiam
import essentia.standard as estd

# Import extras and supress warnings to keep the tutorial clean
import os
import gdown
import zipfile

import numpy as np
import IPython.display as ipd

from pprint import pprint

import warnings
warnings.filterwarnings('ignore')

AUDIO_PATH = os.path.join("..", "audio", "demos")
ARTIST = "dr-brindha-manickavasakan"
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default

We will work on a concert led by Dr. Brindha Manickavasakan, a well-known Carnatic music performer and Doctor which has been very much involved in our research efforts. This concert happened within the December Season 2023 in Chennai, India, at the well-known Arkay Convention Centre. Please note this recording in particular is part of the newly published Saraga Audiovisual Dataset (A. Shankar et al., 2024), which will be soon available for access through mirdata and compIAM.

For now, we will download this particular concert and explore a given rendition.

url = "https://drive.google.com/uc?id=1iR0bfxDLQbH8fEeHU_GFsg2kh7brZ0HZ&export=download"
output =  os.path.join(AUDIO_PATH,  "dr-brindha-manickavasakan.zip")
gdown.download(url, output, quiet=False) 

Once the audio is download, we can extract all the files and remove the .zip file.

# Unzip file
with zipfile.ZipFile(output, 'r') as zip_ref:
    zip_ref.extractall(AUDIO_PATH)

# Delete zip file after extraction
os.remove(output)

Loading and visualising the data#

rendition = "Bhavanuta"  # Selecting a rendition

We use Essentia to first load the mixture audio of the concert. The function AudioLoader can be used to load an audio signal from a file path and return the signal plus some important technical information about it.

file_path = os.path.join(AUDIO_PATH, ARTIST, rendition, rendition + ".wav")
audio_mix, _, _, _, _, _ = estd.AudioLoader(filename=file_path)()
audio_mix = audio_mix.T  # Put channels first

Let’s quickly listed to 30 seconds of this incredible performance!

ipd.Audio(audio_mix[..., :44100*30], rate=44100)

Low level feature extraction#

Tonic Identification#

Music Source Separation#

Small intro goes here?

from compiam import load_model
# This model uses tensorflow in the backend!
separation_model = load_model("separation:cold-diff-sep")
separated_vocals = separation_model.separate(audio_mix)
separated_vocals.shape
(11343744,)
ipd.Audio(separated_vocals[..., :44100*30], rate=44100)

For further reference, please visit the music source separation page.

Pitch Extraction#

from compiam import load_model
# This model uses tensorflow in the backend!

# Importing and initializing again a melodia instance
### Salamon et al. 2012
from compiam.melody.pitch_extraction import Melodia
melodia = Melodia()  

# Importing also a DL model to extract the melody
### Plaja-Roglans et al. 2023
ftanet_carnatic = load_model("melody:ftanet-carnatic")

Predict the melody using both methods.

melodia_pitch_track = melodia.extract(audio_mix)
ftanet_pitch_track = ftanet_carnatic.predict(
    audio_mix,
    out_step=melodia_pitch_track[1, 0],  # Interpolating to same size
)
[2024-11-28 12:48:26,594] WARNING [compiam.melody.pitch_extraction.melodia.extract:90] Resampling... (input sampling rate is 44100Hz, make sure this is correct)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In [12], line 1
----> 1 melodia_pitch_track = melodia.extract(audio_mix)
      2 ftanet_pitch_track = ftanet_carnatic.predict(
      3     audio_mix,
      4     out_step=melodia_pitch_track[1, 0],  # Interpolating to same size
      5 )

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/compiam/melody/pitch_extraction/melodia.py:93, in Melodia.extract(self, input_data, input_sr, out_step)
     89 elif isinstance(input_data, np.ndarray):
     90     logger.warning(
     91         f"Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)"
     92     )
---> 93     resample_audio = estd.Resample(
     94         inputSampleRate=input_sr, outputSampleRate=self.sample_rate
     95     )()
     96     input_data = resample_audio(input_data)
     97     audio = estd.EqualLoudness(signal=input_data)()

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:123, in _create_essentia_class.<locals>.Algo.__call__(self, *args)
    122 def __call__(self, *args):
--> 123     return self.compute(*args)

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:70, in _create_essentia_class.<locals>.Algo.compute(self, *args)
     67 inputNames = self.inputNames()
     69 if len(args) != len(inputNames):
---> 70     raise ValueError(name+'.compute requires '+str(len(inputNames))+' argument(s), '+str(len(args))+' given')
     72 # we have to make some exceptions for YamlOutput and PoolAggregator
     73 # because they expect cpp Pools
     74 if name in ('YamlOutput', 'PoolAggregator', 'SvmClassifier', 'PCA', 'GaiaTransform', 'TensorflowPredict'):

ValueError: Resample.compute requires 1 argument(s), 0 given

Let’s visualize from sec. 4 to sec. 10 of the performance, together with the predicted pitch tracks using both methods.

import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

y, sr = librosa.load(example.audio_path)
fig, ax = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(15, 12))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
img = librosa.display.specshow(D, y_axis='linear', x_axis='time', sr=sr, ax=ax);
ax.set_ylim(0, 2000)
ax.set_xlim(4, 10)
plt.plot(
    melodia_pitch_track[:, 0], melodia_pitch_track[:, 1],
    color="white", label="Melodia",
)
plt.plot(
    ftanet_pitch_track[:, 0], ftanet_pitch_track[:, 1],
    color="black",label="FTANet-Carnatic",
)
plt.legend()
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In [13], line 6
      3 import numpy as np
      4 import matplotlib.pyplot as plt
----> 6 y, sr = librosa.load(example.audio_path)
      7 fig, ax = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(15, 12))
      8 D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)

NameError: name 'example' is not defined

For further reference, please visit the pitch extraction page.

Percussion onset detection#

High level feature extraction#

Melodic pattern discovery#

Raga recognition#

from compiam import load_model
# This model uses tensorflow in the backend!
deepsrgm = load_model("melody:deepsrgm")
feat = deepsrgm.get_features(audio_mix)
[2024-11-28 12:48:28,017] WARNING [compiam.melody.raga_recognition.deepsrgm.get_features:242] Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:97, in _create_essentia_class.<locals>.Algo.compute(self, *args)
     96 try:
---> 97     convertedData = _c.convertData(arg, goalType)
     98 except TypeError:

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/common.py:349, in convertData(data, goalType)
    347         return [[col for col in row] for row in data]
--> 349 raise TypeError('Cannot convert data from type %s (%s) to type %s' %
    350                 (str(origType), str(type(data)), str(goalType)))

TypeError: Cannot convert data from type MATRIX_REAL (<class 'numpy.ndarray'>) to type VECTOR_REAL

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
Cell In [15], line 1
----> 1 feat = deepsrgm.get_features(audio_mix)

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/compiam/melody/raga_recognition/deepsrgm/__init__.py:248, in DEEPSRGM.get_features(self, input_data, input_sr, pitch_path, tonic_path, from_mirdata, track_id, k)
    242     logger.warning(
    243         "Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)"
    244     )
    245     resampling = estd.Resample(
    246         inputSampleRate=input_sr, outputSampleRate=self.sample_rate
    247     )
--> 248     audio = resampling(input_data)
    249 else:
    250     raise ValueError("Input must be path to audio signal or an audio array")

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:123, in _create_essentia_class.<locals>.Algo.__call__(self, *args)
    122 def __call__(self, *args):
--> 123     return self.compute(*args)

File /opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/essentia/standard.py:99, in _create_essentia_class.<locals>.Algo.compute(self, *args)
     97         convertedData = _c.convertData(arg, goalType)
     98     except TypeError:
---> 99         raise TypeError('Error cannot convert argument %s to %s' \
    100               %(str(_c.determineEdt(arg)), str(goalType)))
    102     convertedArgs.append(convertedData)
    104 results = self.__compute__(*convertedArgs)

TypeError: Error cannot convert argument MATRIX_REAL to VECTOR_REAL
predicted_raga = deepsrgm.predict(feat)
predicted_raga
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In [16], line 1
----> 1 predicted_raga = deepsrgm.predict(feat)
      2 predicted_raga

NameError: name 'feat' is not defined
deepsrgm.raga_mapping[predicted_raga]
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In [17], line 1
----> 1 deepsrgm.raga_mapping[predicted_raga]

AttributeError: 'DEEPSRGM' object has no attribute 'raga_mapping'